* This script contains the code to replicate Figure 5 and 6 in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.

log using Figures5-6.log, replace
use prodndnp_fullsample.dta

set matsize 800
/*Creating future netprod*/
set more off
/*MODEL 0*/
/*OLS out-of-sample program*/
forval i = 3(1)20{
qui reg lprodf3 y2-y27 nopapers if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM0= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM0)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM0=(rssols/nols)^0.5   /*RMSE out of sample*/
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM0 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols
outreg2 using careertimeM0, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM0, t,`i') excel


/*MODEL 1*/
/*OLS out-of-sample program*/

qui reg lprodf3 y2-y27 nopapers lprodf3l-lprodf3l13 if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM1= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM1)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM1=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM1= sqerrolsM0-sqerrolsM1
newey diffsqerrlM1 if ybols<., lag(0)
scalar diffrmseoutM1=((rmseo2olsM0-rmseo2olsM1)/rmseo2olsM0)*100
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM1 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM1 "RMSE % difference between Model 1 and Model 0"
drop peols ybols 
outreg2 using careertimeM1unr, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM1, RMSE % diff, diffrmseoutM1, t,`i') excel


/*MV Model 2*/
/*OLS out-of-sample program*/

qui reg lprodf3 y2-y27 nopapers lnetprod1y-lnetprod12y lnetprod21y-lnetprod212y degree1y-degree12y degree21y-degree212y gc1y-gc12y lbet1y-lbet12y lclos1y-lclos12y neiq1fs1y-neiq1fs12y if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM2= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM2)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM2=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM2= sqerrolsM1-sqerrolsM2
newey diffsqerrlM2 if ybols<., lag(0)
scalar diffrmseoutM2=((rmseo2olsM1-rmseo2olsM2)/rmseo2olsM1)*100
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM2 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM2 "RMSE % difference between Model 2 and Model 0"
drop peols ybols
outreg2 using careertimeM2unr, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM2, RMSE % diff, diffrmseoutM2, t,`i') excel



/*MV Model 3*/
/*OLS out-of-sample program*/
qui reg lprodf3 y2-y27 nopapers lprodf3l-lprodf3l13 lnetprod1y-lnetprod6y lnetprod21y-lnetprod26y degree1y-degree6y degree21y-degree26y gc1y-gc6y lbet1y-lbet6y lclos1y-lclos6y neiq1fs1y-neiq1fs6y if group==1 & t==`i'
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & t==`i' & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM3= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM3)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2olsM3=(rssols/nols)^0.5   /*RMSE out of sample*/
gen diffsqerrlM3= sqerrolsM1-sqerrolsM3
newey diffsqerrlM3 if ybols<., lag(0)
scalar diffrmseoutM3=((rmseo2olsM1-rmseo2olsM3)/rmseo2olsM1)*100
di rmseols   "  Pooled OLS in-sample RMSE"
di n1 " Number of observations in the in-sample group"
di rmseo2olsM3 "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
di diffrmseoutM3 "RMSE % difference between Model 3 and Model 1"
drop peols ybols
outreg2 using careertimeM3unr, drop(`varlist') nocon dec(3) nose long addstat(n_in, n1,n_os,nols, R-squared,r2i1ols, RMSE,rmseo2olsM3, RMSE % diff, diffrmseoutM3, t,`i') excel
drop sqerrolsM0 sqerrolsM1 sqerrolsM2 sqerrolsM3 diffsqerrlM1 diffsqerrlM2 diffsqerrlM3
}
log close


/* Data editing*/ 

insheet using "careertimeM1unr.txt", clear
gen n=_n
drop if n<5
drop if n>11
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable 
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared M1'"'
label variable v3 `"RMSE M1'"'
label variable v4 `"RMSE % Diff. MV1' M0"'
label variable v5 `"n_in"'
label variable v6 `"n_os"'
label variable v7 `"t"'
drop _j
rename v1 obs 
rename v2 rm1 
rename v3 rmsem1 
rename v4 rmsediffm1m0
rename v5 nin
rename v6 nos
rename v7 t
save careertimeM1unr,replace


insheet using "careertimeM2unr.txt", clear
gen n=_n
drop if n<5
drop if n>11
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared M2'"'
label variable v3 `"RMSE MV2'"'
label variable v4 `"RMSE % Diff. MV2' M1"'
label variable v5 `"n_in"'
label variable v6 `"n_os"'
label variable v7 `"t"'
rename v1 obs 
rename v2 rm2 
rename v3 rmsem2 
rename v4 rmsediffm2m1
rename v5 nin
rename v6 nos
rename v7 t
drop _j
save careertimeM2unr,replace

insheet using "careertimeM3unr.txt", clear
gen n=_n
drop if n<5
drop if n>11
rename v1 variable
reshape long v, i(variable)
encode variable, gen(varlabel)
destring v, generate(v2) ignore(,)
drop v
rename v2 v
drop n variable
reshape wide v, i(_j) j(varlabel)
label variable v1 `"Observations"'
label variable v2 `"R-squared M3'"'
label variable v3 `"RMSE MV3'"'
label variable v4 `"RMSE % Diff. MV3' M1"'
label variable v5 `"n_in"'
label variable v6 `"n_os"'
label variable v7 `"t"'
drop _j
rename v1 obs 
rename v2 rm3 
rename v3 rmsem3 
rename v4 rmsediffm3m1
rename v5 nin
rename v6 nos
rename v7 t

save careertimeM3unr,replace

joinby t using careertimeM1unr
joinby t using careertimeM2unr
drop obs nin nos rm1 rm2 rm3 rmsediffm1m0
order t rmsem1 rmsem2 rmsem3 rmsediffm2m1 rmsediffm3m1
save Careertime_unrestricted, replace

/*********************Creating the graphs, figure 5 and 6***********************************/
twoway (line rmsem1 t) (line rmsem2 t)  (line rmsem3 t), xlabel(5(5)20) graphregion(fcolor(white) lcolor(white))
graph export "Figure5.eps", as(eps) preview(off)

twoway (line rmsediffm2m1 t) (line rmsediffm3m1 t), xlabel(5(5)20) graphregion(fcolor(white) lcolor(white))
graph export "Figure6.eps", as(eps) preview(off)




